df <- read.csv("CrowNestlingClimate.csv", h=TRUE)


#Select variables
df <- df %>% 
  select(Year,Name,NestName,ID,AgeField,CalcAge,HatchDateJul,HatchDateJulYear,AllSex,BillNT,BillWidth,BillDepth,TEC,Head,UpperBill,UBillSurface,TotBillSurface,Skull,Tarsus,Weight)

#Rename variables
df <- df %>% 
  rename(FieldAge=AgeField,BNT=BillNT,BW=BillWidth,BD=BillDepth,UB=UpperBill,UBS=UBillSurface,TBS=TotBillSurface)


#Count NAs
countNAs <- sapply(df, function(x) sum(is.na(x)))
countNAs
##             Year             Name         NestName               ID 
##                0                3                0                0 
##         FieldAge          CalcAge     HatchDateJul HatchDateJulYear 
##              162                4                3                3 
##           AllSex              BNT               BW               BD 
##                0               11               15               15 
##              TEC             Head               UB              UBS 
##               12               13                2                2 
##              TBS            Skull           Tarsus           Weight 
##                0               16                8               20
#Remove NAs
df <- df %>% 
  filter_at(vars(Weight,HatchDateJul,BD,Tarsus,Skull,BW), all_vars(!is.na(.)))

#Recount NAs
countNAs <- sapply(df, function(x) sum(is.na(x)))
countNAs
##             Year             Name         NestName               ID 
##                0                3                0                0 
##         FieldAge          CalcAge     HatchDateJul HatchDateJulYear 
##              160                0                0                0 
##           AllSex              BNT               BW               BD 
##                0                0                0                0 
##              TEC             Head               UB              UBS 
##                0                0                0                0 
##              TBS            Skull           Tarsus           Weight 
##                0                0                0                0


#filter out names with 'doa' or 'dead'
df <- df %>% filter(!grepl("doa|dead",Name))

#filter out Weights < 160 
df <- df %>% filter(Weight > 160)
range(df$Weight)
## [1] 163 500
WeightByFieldAge.plot <- ggplot(data = df, aes(x=FieldAge,y=Weight,label=ID,color=HatchDateJul))+
  geom_point()
ggplotly(WeightByFieldAge.plot)
df <- df %>% filter(between(CalcAge, 24,30))
range(df$CalcAge)
## [1] 24.0 29.9
WeightByCalcAge.plot <- ggplot(data = df, aes(x=CalcAge,y=Weight,label=ID))+
  geom_point()
ggplotly(WeightByCalcAge.plot)
climate.df <- read.csv("ClimateMetrics.csv", h=TRUE)
df <- left_join(df,climate.df, by = "HatchDateJulYear")
df <- df %>% relocate(Date, .before = AllSex)
#write.csv(df, "AllNestlingsClimateJoined.csv")
#variables that don't get scaled 
DataNotScaled.df <- df[,1:10]

#Numerical data that do get scaled
DataToScale.df <- df[,11:39]

#Scale those data
Scaled.df <- scale(DataToScale.df)

#Rejoin with variables that don't get scaled
scaled.df <- cbind(DataNotScaled.df,Scaled.df)
BD.scaled.mdl <- lm(data = scaled.df, BD ~ GDDSum12_22 * PrecipSum12_22 + Weight + CalcAge)
summary(BD.scaled.mdl)
## 
## Call:
## lm(formula = BD ~ GDDSum12_22 * PrecipSum12_22 + Weight + CalcAge, 
##     data = scaled.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0716 -0.4851 -0.0221  0.4559  3.6906 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -4.02712    0.42291  -9.522  < 2e-16 ***
## GDDSum12_22                 0.14414    0.01708   8.441  < 2e-16 ***
## PrecipSum12_22             -0.01170    0.01681  -0.696    0.487    
## Weight                      0.58078    0.01796  32.338  < 2e-16 ***
## CalcAge                     0.15426    0.01616   9.546  < 2e-16 ***
## GDDSum12_22:PrecipSum12_22 -0.07563    0.01559  -4.852 1.32e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7508 on 2042 degrees of freedom
## Multiple R-squared:  0.4376, Adjusted R-squared:  0.4363 
## F-statistic: 317.8 on 5 and 2042 DF,  p-value: < 2.2e-16